{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 下载PixivCrawler抓取到的图片"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import random\n",
    "import sqlite3 as lite\n",
    "import time\n",
    "\n",
    "import pandas as pd\n",
    "from sqlalchemy import create_engine\n",
    "\n",
    "from pixivpy3 import *\n",
    "\n",
    "try:\n",
    "    from tqdm.notebook import tqdm  # new tqdm\n",
    "except:\n",
    "    from tqdm import tqdm_notebook as tqdm\n",
    "\n",
    "\n",
    "class PixivDownloader:\n",
    "    def __init__(self, illust_db=\"pixiv_illusts.db\"):\n",
    "        self.illust_db = illust_db\n",
    "\n",
    "    def DBIllusts(self, sql=\"SELECT * FROM illusts WHERE illust_id > 0\"):\n",
    "        with lite.connect(self.illust_db) as conn:\n",
    "            sql_df = pd.read_sql_query(sql, conn, index_col=\"illust_id\")\n",
    "\n",
    "        # 还原json字段\n",
    "        sql_df[\"image_urls\"] = sql_df.image_urls.apply(json.loads)\n",
    "        sql_df[\"meta_pages\"] = sql_df.meta_pages.apply(json.loads)\n",
    "        sql_df[\"meta_single_page\"] = sql_df.meta_single_page.apply(json.loads)\n",
    "        sql_df[\"series\"] = sql_df.series.apply(json.loads)\n",
    "        sql_df[\"tags\"] = sql_df.tags.apply(json.loads)\n",
    "        sql_df[\"tools\"] = sql_df.tools.apply(json.loads)\n",
    "        sql_df[\"user\"] = sql_df.user.apply(json.loads)\n",
    "        return sql_df\n",
    "\n",
    "    def randSleep(self, base=0.1, rand=0.5):\n",
    "        \"休眠随机的时间\"\n",
    "        time.sleep(base + rand * random.random())\n",
    "\n",
    "    def getImageUrl(self, illust, origin=True):\n",
    "        if origin:\n",
    "            return illust[\"meta_single_page\"].get(\"original_image_url\", illust[\"image_urls\"][\"large\"])\n",
    "        else:  # square\n",
    "            return illust[\"image_urls\"][\"square_medium\"]\n",
    "\n",
    "    def StartDownload(self, path, origin=True):\n",
    "        if not os.path.exists(path):\n",
    "            os.mkdir(path)\n",
    "\n",
    "        api = AppPixivAPI(timeout=3)\n",
    "        df = self.DBIllusts()\n",
    "        for _, illust in tqdm(df.iterrows(), total=df.shape[0]):\n",
    "            image_url = self.getImageUrl(illust, origin)\n",
    "            for i in range(3):\n",
    "                try:\n",
    "                    if api.download(image_url, path=path):\n",
    "                        self.randSleep()\n",
    "                    break\n",
    "                except Exception as e:\n",
    "                    print(f\">> Download {image_url} failed: {e}\")\n",
    "\n",
    "\n",
    "dl = PixivDownloader()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# download original images\n",
    "df = dl.StartDownload(path=\"./illusts\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# download square images\n",
    "dl.StartDownload(path=\"squares\", origin=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
